df_od <- read_csv(here::here("data", "VSRR_Provisional_Drug_Overdose_Death_Counts.csv"))
df_od$Date <- parse_date_time(paste(tolower(df_od$Month), df_od$Year), orders=c("by"))
df_od$NumDeaths <- df_od$`Data Value`
df_od <- df_od %>% filter(`Indicator`=='Number of Deaths' & `State` != 'US')
df_od <- df_od %>% left_join(., pop, by=c("State"="State"))
df_od$FatalitiesPerCap <- df_od$`Data Value`/df_od$pop*100


p1 <- ggplot(df_od, aes(x=Date, y=`FatalitiesPerCap`, group=`StateName`, colour=`StateName`)) +
  geom_line() +
  gghighlight(max_highlight = 8L, max(`FatalitiesPerCap`),  use_direct_label=FALSE, label_key=`FatalitiesPerCap`) +
  labs(title="WEST VIRGINIA'S OPIOID PROBLEM STANDS \nIN STARK CONTRAST TO REST OF COUNTRY", 
       subtitle = "DRUG OVERDOSES PER CAPITA OVER THE YEARS",
       caption="SOURCE: NATIONAL CENTER FOR HEALTH STATISTICS",
       x="DATE", y="% FATAL DRUG OVERDOSES PER CAPITA") +
  jtheme + jfill + jcolor

p1

Arguably no one has been hit harder by the opioid epidemic than those living in the rust belt and the rural south. One can see above that rural America (Alabama, Arkansas, Kentucky, Tennesee, and West Virginia) comprise some of the states hit hardest per capita by opioid related deaths. Many factors contribute to this scourge on the region, including economic disenfranchisement and high rates of disability enrollment.

df_ods_by_state <- aggregate(df_od, by=list(df_od$StateName), FUN=mean, na.rm=TRUE)
df_ods_by_state$StateName <- df_ods_by_state$Group.1
df_ods <- df_ods_by_state %>% select(StateName, FatalitiesPerCap)
mean_ods <- mean(df_ods$FatalitiesPerCap)

df_ods$Above <- df_ods$FatalitiesPerCap > mean_ods

df_ods$Above[df_ods$Above == TRUE] <- "BLUE"
df_ods$Above[df_ods$Above == FALSE] <- "RED"


p2 <- ggplot(df_ods, aes(x=reorder(StateName, FatalitiesPerCap), y=FatalitiesPerCap, label=format(FatalitiesPerCap, digits=2, nsmall=2))) + 
  geom_point(stat='identity', aes(col=Above), size=6)  +
  scale_color_manual(name="FATALITIES BY OVERDOSE (% OF POP)", 
                     labels = c("BELOW AVERAGE", "ABOVE AVERAGE")) +
  labs(title="NEW YORK AND WEST VIRGINIA LIE ON OPPOSITE \nPOLES OF THE OPIOID EPIDEMIC", 
       subtitle="FATAL DRUG OVERDOSES BY STATE",
       caption="SOURCE: NATIONAL CENTER FOR HEALTH STATISTICS",
       x="STATE", y="FATALITIES PER CAPITA (% OF POP)") + 
  ylim(0, 2.0) +
  coord_flip() + 
  jtheme + jfill + jcolor
p2

While West Virginia is most intensely affected among US states, no state is immune to the epidemic; even in New York, the state whose opioid-related death rate is lowest per capita, one half of one percent of the population will die from a drug-related overdose. The standard deviation of opioid abuse’s impact on states is smaller than one might guess if they are only reading articles (https://www.nytimes.com/interactive/2018/us/west-virginia-opioids.html) about the plight of West Virginia. While West Virginia is an outlier as shown in the above plot, this is a nationwide epidemic.

DRUG_NAMES <- c("VICODIN", "OXYCODONE", "OXYCONTIN", "PERCOCET", 
                "OPANA", "KADIAN", "AVINZA", "FENTANYL")

fname_drug_utilization <- "State_Drug_Utilization_Data_"

# First time processing of DataFrame
#df_util <- read_csv(here::here("data", paste(fname_drug_utilization, "2008", ".csv", sep=""))) %>% filter(`Product Name` %in% DRUG_NAMES)
#for (i in 2009:2018) {
#  df_util <- rbind(df_util, read_csv(here::here("data", paste(fname_drug_utilization, i, ".csv", sep=""))) %>% filter(`Product Name` %in% DRUG_NAMES)) 
#}

#readr::write_csv(df_util, here::here("data", "df_util.csv"))
df_util <- read_csv(here::here("data", "df_util.csv"))
df_rx <- aggregate(df_util[,c("Number of Prescriptions", "Total Amount Reimbursed")], by=list(df_util$Year, df_util$`Product Name`), FUN="sum", na.rm=TRUE)
df_rx$`Drug Name` <- df_rx$Group.2


p3 <-  df_rx %>% ggplot(aes(Group.1, `Total Amount Reimbursed`, size=`Number of Prescriptions`, colour=`Drug Name`)) +
  geom_point(alpha = 0.7) +
  geom_line(size=1) + 
  scale_y_continuous(labels = dollar) +
  labs(title = "PAINKILLER REIMBURSEMENTS SHRINK WHILE \nPRESCRIPTIONS REMAIN FAIRLY CONSTANT",
       subtitle = "EXPENSE REIMBURSEMENT & PRESCRIPTIONS OVER TIME",
       caption = "SOURCE: DATA.MEDICAID.GOV",
       x = 'YEAR', y = 'TOTAL $ REIMBURSED') +
  jtheme + jfill + jcolor

p3$labels$size <- "NUMBER OF PRESCRIPTIONS"
p3$labels$colour <- "DRUG NAME"

p3

2015 was a clear reckoning for the US Government’s reimbursement of presciption opioids, those contributing most towards the opioid epidemic. The data above shows the prescriptions in cicle size over time, along with the total amount reimbursed, thanks to medicaid data. Economists might anticipate that as less and less reimbusement money is available for prescription opioids, doctors would prescribe fewer pills. Though we do see slightly larger circles as reimbursements go up, it seems that prescriptions aren’t very responsive to drug reimbursement. This relationship may show different results in comparing reimbursement to the number of filled prescriptions at the pharmacy.

df_util$StateName <- abbr2state(df_util$State)

# Drug Overdoses
DATA_OVERDOSE <- "VSRR_Provisional_Drug_Overdose_Death_Counts.csv"
df_od <- read_csv(here::here("data", DATA_OVERDOSE))
df_od$Date <- parse_date_time(paste(tolower(df_od$Month), df_od$Year), orders=c("by"))
df_od$NumDeaths <- df_od$`Data Value`
df_od <- df_od %>% filter(`Indicator`=='Number of Deaths' & `State` != 'US')
df_od <- df_od %>% left_join(., pop, by=c("State"="State")) %>% left_join(., df_util, by=c("State"="State", "Year"="Year"))

df_od$FatalitiesPer <- df_od$`Data Value`*1000/df_od$pop
df_od$`Number of Prescriptions` <- df_od$`Number of Prescriptions`*1000/df_od$pop

yr=2018

df_ods_by_state <- df_od %>% filter(Year == yr)
df_ods_by_state <- aggregate(list(FatalitiesPer=df_ods_by_state$FatalitiesPer, NumRxPer=df_ods_by_state$`Number of Prescriptions`), by=list(StateName=df_ods_by_state$StateName.x), FUN="sum", na.rm=TRUE)

# Import Cartogram Polygons, merge drug overdose data
spdf <- geojson_read(here::here("data", "us_states_hexgrid.geojson"),  what = "sp")
spdf@data = spdf@data %>% mutate(google_name = gsub(" \\(United States\\)", "", google_name)) 
spdf@data = spdf@data %>% left_join(., df_ods_by_state, by=c("google_name"="StateName"))
cartogram <- cartogram_cont(spdf, 'FatalitiesPer')

carto_fortified <- tidy(cartogram, region = "google_name") %>% left_join(. , cartogram@data, by=c("id"="google_name")) 
centers <- cbind.data.frame(data.frame(gCentroid(cartogram, byid=TRUE), id=cartogram@data$iso3166_2))   

p_green <- ggplot() +
    geom_polygon(data = carto_fortified, aes(fill = FatalitiesPer, x = long, y = lat, group = group) , size=0.05, alpha=0.9, color="#f5f5f9") +
    scale_fill_gradientn(colours=brewer.pal(9, "Greens"), name="OPIOID RELATED DEATHS", guide=guide_legend( keyheight = unit(3, units = "mm"), keywidth=unit(15, units = "mm"), title.position = 'top', label.position = "bottom") ) +
    geom_text(data=centers, aes(x=x, y=y, label=id), color="#444444", size=3, alpha=0.6) +
    theme_void() +
    labs( title=paste("WHERE OVERDOSE DEATHS HIT HARDEST\n",yr),
          subtitle="CARTOGRAM OF DEATHS ACROSS THE US",
          caption="SOURCE: VSSR, DATA.MEDICAID.GOV") +
    jtheme_nogrid + 
    coord_map()  
p_blue <- ggplot() +
    geom_polygon(data = carto_fortified, aes(fill = NumRxPer, x = long, y = lat, group = group) , size=0.05, alpha=0.9, color="#f5f5f9") +
    scale_fill_gradientn(colours=brewer.pal(9, "Blues"), name="PRESCRIPTION RATES", guide=guide_legend( keyheight = unit(3, units = "mm"), keywidth=unit(15, units = "mm"), title.position = 'top', label.position = "bottom") ) +
    geom_text(data=centers, aes(x=x, y=y, label=id), color="#444444", size=3, alpha=0.6) +
    theme_void() +
    labs( title=paste("WHERE DRUGS LEAVE THE HOSPITAL\n",yr),
          subtitle="CARTOGRAM OF KEY OPIOID PRESCRIPTION RATES ACROSS THE US",
          caption="SOURCE: VSSR, DATA.MEDICAID.GOV") +
    jtheme_nogrid +
    coord_map()  


#grid.arrange(p_green, p_blue, nrow = 2)
p_green

The rust belt in 2018 saw the worst of opioid related drug overdoses. This likely has to do with smuggling routes, with major hubs in cities like Chicago. The rural west appears to be relatively minimally affected, with states along the Sierra Nevadas showing low rates of overdose deaths. In the Northeast, results are mixed, with coastal mid-atlantic states faring much better than the rural border states like New Hampshire.

p_blue

Correlation to states with high prescription rates doesn’t tell much of a story when comparing cartograms. Maryland had by far the most opioid prescriptions, followed by a cluster of states in the Southwest, omitting Utah, whose large Mormon population dampens its statistics related to drug utilization. There doesn’t appear to be a relationship within given years between opioid prescription and abuse within states. This could hint at a black market, or that most people are not getting addicted to opioids from injury.

US <- map_data("world") %>% filter(region=="USA")
data=us.cities

df_util$`NUMBER OF PRESCRIPTIONS` = df_util$`Number of Prescriptions`
df_util$`PRODUCT NAME` = df_util$`Product Name`
unique(df_util$`Product Name`)
## [1] "KADIAN"    "AVINZA"    "OXYCODONE" "OXYCONTIN" "PERCOCET"  "FENTANYL" 
## [7] "OPANA"     "VICODIN"
df_util %>% filter(df_util$`Product Name` %in% c("OXYCODONE", "OXYCONTIN")) %>%
  #arrange(`Number of Prescriptions`) %>%
  #mutate( name=factor(name, unique(name))) %>%
  #mutate(pop=pop/1000000) %>%
  ggplot() +
    geom_polygon(data = US, aes(x=long, y = lat, group = group), fill="grey", alpha=0.3) +
    geom_point(width=2, height=2, aes(x=Longitude, y=Latitude, size=`NUMBER OF PRESCRIPTIONS`, color=`PRODUCT NAME`), shape=20, stroke=FALSE) +
    scale_size_continuous(name="NUMBER OF PRESCRIPTIONS", range=c(1,12)) +
    #scale_alpha_continuous(name="Year", range=c(0.1, .7)) +
    xlim(-125, -70) + ylim(25,50) + coord_map() + 
    guides(color=guide_legend(title="PRODUCT NAME")) +
    jtheme_nogrid + jcolor +
    labs(title = "OXY'S MOVEMENT SHOWS A LULL\nBETWEEN 2012-2014", subtitle='PRESCRIPTIONS THROUGH THE YEARS: {frame_time}', caption="SOURCE: MEDICAID.GOV") +
  #transition_states(year, transition_length = 2, state_length=3) +
  transition_time(Year) +
    ease_aes('linear', interval = 1.0) 

Shown above are the proliferation of prescriptions of two of the most dangerously addictive opioids, Oxycodone and Oxycontin, by their site of prescription over the past ten years from 2008 to 2018. Circle sizes show relative number of prescriptions for each location overlayed on the US map. Alaska and Hawaii were omitted from this graphic, and were very low in total number of prescriptions. Interestingly, there is a noticeable lull between the years of 2012 and 2014, where fewer prescriptions are doled out. Additionally, one might expect these to follow population segments, however, while there is a dense corridor of prescriptions on the east coast, the west coast does not display similar levels of opioid prescription rates at any point during the sampled time period.

df_fda <- read_tsv(here::here("data", "fda", "Submissions.txt"))
df_sub_class <- read_tsv(here::here("data", "fda", "SubmissionClass_Lookup.txt"))
df_fda <- df_fda %>% left_join(., df_sub_class, by=c("SubmissionClassCodeID"="SubmissionClassCodeID"))

df_fda <- df_fda %>% filter(!(SubmissionClassCodeDescription %in% c("Labeling", "Not Applicable", "Supplement", "Efficacy", "REMS", "Medical Gas", NA)))
df_fda$Year <- format(as.Date(df_fda$SubmissionStatusDate, format="%Y-%m-%d"),"%Y")
df <- df_fda %>% select(Year, "SubmissionClassCode")
df$ones <- 1

df <- aggregate(df$ones, by=list(df$Year, "Submission Type"=df$SubmissionClassCode), FUN="sum", na.rm=TRUE)
df$Year <- strtoi(df$Group.1)
df$Total <- df$x

df_fda$Year <- strtoi(df_fda$Year)

df %>% ggplot(aes(x=`Submission Type`, y=`Total`, fill=`Submission Type`, color=`Submission Type`)) +
    geom_bar(stat='identity') +
    labs(title = "FDA DRUG SUBMISSION TYPES", subtitle='SUBMISSION TRENDS THROUGH THE YEARS: {frame_time}', caption="SOURCE: FDA.GOV")+
  guides(colour=guide_legend(nrow=30)) + jtheme + ylim(0,100) + 
  theme(axis.text.x = element_text(angle=90, hjust=1)) +
  transition_states(year, transition_length = 2, state_length=3) +
  transition_time(Year) +
  ease_aes('linear', interval = .1) 

While over the years, various types of drug submissions have fluctuated, the early 2000s showed major spikes in “Bioequivalent” drug submission type, new drugs to perform similar functions as existing drugs. Other submission types, for context are “New Molecular Entities” (Type 1), “New Active Ingredient” (Type 2), “New Dosage Form” (Type 3), “New Combination” (Type 4), “New Formulation/Manufacturer” (Type 5), etc. Note that in the 70s and 80s, New Manufacturers spiked. In the 2000s, Bioequivalence, New Dosage Forms, and New Combinations spiked most. This appears to show a shift from acquisitions of drug manufacturers into rebranding of existing drugs, and pivots towards different dosages. Further research could shed light on the nature of Type 3 dosage changes, and if the FDA has approved dosage increases at an extraordinary rate.